/*************************************************************************
 * The contents of this file are subject to the MYRICOM MYRINET          *
 * EXPRESS (MX) NETWORKING SOFTWARE AND DOCUMENTATION LICENSE (the       *
 * "License"); User may not use this file except in compliance with the  *
 * License.  The full text of the License can found in LICENSE.TXT       *
 *                                                                       *
 * Software distributed under the License is distributed on an "AS IS"   *
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See  *
 * the License for the specific language governing rights and            *
 * limitations under the License.                                        *
 *                                                                       *
 * Copyright 2003 - 2004 by Myricom, Inc.  All rights reserved.          *
 *************************************************************************/

static const char __idstring[] = "@(#)$Id: receive.c,v 1.37 2005/06/29 00:23:16 eugene Exp $";

/*
 *  Routines receiving messages
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "mx_auto_config.h"
#include "myriexpress.h"
#include "internal.h"
#include "rdma.h"

/* local prototypes */

/*
 * search a endpoint for a receive matching this match_data
 */
struct mx_post *
mx_find_recv(struct mx_endpoint *endpoint, uint64_t match_info,
	     mx_sr_type_t type)
{
  struct mx_post *post;
  struct mx_lib_recv *recv;
  uint64_t match_mask;
    
  if (type == MX_SR_TYPE_ISSEND){
    type = MX_SR_TYPE_SEND;
  }

  /* search all pending receives for one matching */
  for (post = endpoint->recv_list.next;
       post != &endpoint->recv_list;
       post = post->next) {
        
    recv = &post->ts.recv;
    match_mask = recv->match_mask;
        
#if 0 /* reese */
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("match_data 0x%08x%08x\n", unmaskable, maskable));
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("type = %d\n", type));
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("recv->match_data 0x%08x%08x\n", recv->unmaskable,
              recv->maskable));
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("recv->match_mask %08x\n", mask));
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("recv->type = %d\n", recv->type));
#endif
        
    if (((match_info & match_mask) == recv->match_info) &&
	(type == recv->type)) {
      MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("Found matching receive for match_data = 0x%08x%08x, h=%p\n",
                MX_U32(match_info), MX_L32(match_info), post));
      return post;
    }
  }
    
  /* not found if we got here */
  return NULL;
}

void
mx_fulfill_recv(struct mx_post *post,
                struct mx_address_desc *ep,
                uint32_t len,
		uint64_t match_info,
                mx_sr_type_t type,
                mx_request_t sender_request)
{
  uint32_t nleft;
  uint32_t xfer_len;
  uint32_t i;
  int n;
  int drain;
  char drain_buf[256];
  struct mx_lib_recv *recv;
  struct mx_endpoint *endpoint;
  int rdma_fail = 0;

  /* access receive-specific part of post */
  recv = &post->ts.recv;
  endpoint = post->endpoint;

  /* fill in data in the segments */
  xfer_len = MIN(len, recv->length);
  nleft = xfer_len;
  drain = len - xfer_len;
  
  if (recv->type == MX_SR_TYPE_GET_DATA){
    for(i = 0; i < recv->seg_cnt; i++){
      if (!mx_rdma_allowed(MX_UINT64(recv->seg_list[i].segment_ptr),
                           recv->seg_list[i].segment_length,
                           MX_RDMA_WRITE)){
        break;
      }
    }
    if (i != recv->seg_cnt){
      MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("RDMA not allowed\n"));
      nleft = 0;
      drain = len;
      xfer_len = 0;
      rdma_fail = 1;
    } else {
      MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("RDMA allowed\n"));
    }
  }

  i = 0;
  /* check on i should be redundant */
  while ((nleft > 0) && (i < recv->seg_cnt)) {
    n = MIN(nleft, recv->seg_list[i].segment_length);
        
    /* allow for 0-length segments */
    if (n == 0) {
      continue;
    }
        
    n = mx_sock_read(ep->recv_sock, recv->seg_list[i].segment_ptr, n);
    if (n <= 0) {
      mx_close_recv_socket(endpoint, ep);
      fprintf(stderr, "Error receiving message match=0x%08x%08x\n",
              MX_U32(recv->match_info), MX_L32(recv->match_info));
      return;
    }
        
    nleft -= n;		/* update remaining byte count */
    ++i;		/* move to next segment */
  }
    
  /* drop rest of message on the floor */
  while (drain > 0) {
    n = MIN(drain, sizeof(drain_buf));
    n = mx_sock_read(ep->recv_sock, drain_buf, n);
    if (n <= 0) {
      mx_close_recv_socket(endpoint, ep);
      fprintf(stderr, "Error draining message match=0x%08x%08x\n",
              MX_U32(recv->match_info), MX_L32(recv->match_info));
      return;
    }
    drain -= n;
  }
    
  /* Save status information */
  post->status.source = ep->address;
  post->status.msg_length = len;
  post->status.xfer_length = xfer_len;
  post->status.match_info = match_info;
  if (rdma_fail){
    post->status.code = MX_STATUS_REJECTED;
  } else if (post->status.xfer_length < post->status.msg_length) {
    post->status.code = MX_STATUS_TRUNCATED;
  } else {
    post->status.code = MX_STATUS_SUCCESS;
  }

  /*
    ISSEND ACK
  */
  if (type == MX_SR_TYPE_ISSEND){ /* if ISSEND */
    mx_post_issend_ack(endpoint, ep->address, sender_request);
  } 

  /* remove from active list */
  pthread_mutex_lock(&Mx_po_lock);
  MX_LIST_REMOVE(post);
    
  /* mark post as completed and move to completed list */
  post->complete = 1;
  MX_LIST_INSERT(&endpoint->completed_posts, post);
  pthread_cond_signal(&post->wait_cond);
  pthread_cond_signal(&endpoint->peek_cond);
  pthread_mutex_unlock(&Mx_po_lock);
}

void
mx_queue_msg(struct mx_endpoint *endpoint,
	     struct mx_address_desc *ep,
	     uint64_t match_info,
             mx_request_t sender_request,
	     uint32_t len,
	     mx_sr_type_t type)
{
  struct mx_queued_msg *msg;
  int rc;
    
  /* allocate a place to put this message */
  msg = (struct mx_queued_msg *)malloc(MX_QUEUED_MSG_SIZE(len));
  if (msg == NULL) {
    fprintf(stderr, "trashing unexpected message len=%d - no room!\n", len);
    mx_trash_packet(ep->recv_sock, len);
    return;
  }
    
  /* fill in fields */
  msg->source = ep->address;
  msg->match_info = match_info;
  msg->sender_request = sender_request;
  msg->length = len;
  msg->type = type;
    
  /* slurp the data from the socket */
  rc = mx_sock_read(ep->recv_sock, msg->data, len);
  if (rc == -1) {
    free(msg);
    mx_close_recv_socket(endpoint, ep);
    return;
  }
    
  /* OK, message successfully saved.  Link it to the end of the
   * list of queued messages
   */
  MX_LIST_INSERT(&endpoint->queued_messages, msg);
  MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("queued unexpected msg, match=0x%08x%08x, len=%d\n",
            MX_U32(match_info), MX_L32(match_info), len));
  pthread_cond_signal(&endpoint->probe_cond);
}

/*
 * destroy a list of queued messages
 */
void
mx_destroy_queued_message_list(struct mx_queued_msg *list)
{
  struct mx_queued_msg *msg;
  struct mx_queued_msg *next;
    
  for (msg = list->next; msg != list; msg = next) {
    next = msg->next;
    free(msg);
  }
}

/* This function is called from main thread */
uint32_t
mx_match_queued_message(struct mx_post *post, mx_sr_type_t type)
{
  struct mx_endpoint *endpoint;
  struct mx_lib_recv *recv;
  struct mx_queued_msg *msg;
  uint32_t nbytes;
  void *ptr;
  int rdma_fail = 0;
  int i;
  int n;
    
  endpoint = post->endpoint;
  recv = &post->ts.recv;

  /* search all queued messages for one matching */
  for (msg = endpoint->queued_messages.next;
       msg != &endpoint->queued_messages;
       msg = msg->next) {
    mx_sr_type_t tp;
    if (msg->type == MX_SR_TYPE_ISSEND){
      tp = MX_SR_TYPE_SEND;
    } else {
      tp = msg->type;
    }
    if (((msg->match_info & recv->match_mask) == recv->match_info) &&
	(tp == type)) {
      MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("Found queued message for match_key = 0x%08x%08x\n",
                MX_U32(recv->match_info), MX_L32(recv->match_info)));
      break;
    }
  }
  
  /* if not found, return false */
  if (msg ==  &endpoint->queued_messages) {
    return 0;
  }

  /* take the queued message out of the list */
  MX_LIST_REMOVE(msg);
    
  if (recv->type == MX_SR_TYPE_GET_DATA){
    for(i = 0; i < recv->seg_cnt; i++){
      if (!mx_rdma_allowed(MX_UINT64(recv->seg_list[i].segment_ptr),
                           recv->seg_list[i].segment_length,
                           MX_RDMA_WRITE)){
        break;
      }
    }
    if (i != recv->seg_cnt){
      MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("RDMA not allowed\n"));
      recv->length = 0;
      rdma_fail = 1;
    } else {
      MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("RDMA allowed\n"));
    }
  }

  /* Got a match, copy this queued message into the posted receive buffers */
  ptr = msg->data;
  nbytes = MIN(recv->length, msg->length);
  i = 0;
  while (nbytes > 0) {
        
    n = MIN(nbytes, recv->seg_list[i].segment_length);
        
    /* skip 0-length segments */
    if (n == 0) {
      continue;
    }
        
    /* copy this lump of data over */
    memcpy(recv->seg_list[i].segment_ptr, ptr, n);
        
    nbytes -= n;	/* subtract from bytes moved from bytes left */
    ptr += n;		/* move pointer */
    ++i;		/* next segment */
  }
    
  /* update the post status structure */
  post->status.source = msg->source;
  post->status.msg_length =  msg->length;
  post->status.xfer_length = MIN(recv->length, msg->length);
  post->status.match_info = msg->match_info;
  if (rdma_fail){
    post->status.code = MX_STATUS_REJECTED;
  } else if (post->status.xfer_length < post->status.msg_length) {
    post->status.code = MX_STATUS_TRUNCATED;
  } else {
    post->status.code = MX_STATUS_SUCCESS;
  }
    
  /* ISSEND ACK */
  if (msg->type == MX_SR_TYPE_ISSEND){
    mx_post_issend_ack(endpoint, msg->source, msg->sender_request);
  }

  /* post is now complete */
  pthread_mutex_lock(&Mx_po_lock);
  post->complete = 1;
  /* cond_signal is not required because this post
     has never been in the post queue
     pthread_cond_signal(&post->wait_cond);
  */

  /* all done with the message struct */
  free(msg);
    
  /* and finally, place the completed post on the end of the
   * completed post list
   */
  MX_LIST_INSERT(&endpoint->completed_posts, post);
  pthread_mutex_unlock(&Mx_po_lock);

  /*
    pthread_cond_signal(&endpoint->peek_cond);
  */
  return 1;		/* return true */
}

/* called from main thread */
uint32_t
mx_match_queued_barrier(struct mx_post *post, mx_sr_type_t type)
{
  struct mx_endpoint *endpoint;
  struct mx_lib_barrier *barrier;
  struct mx_queued_msg *msg;
  struct mx_barrier_callback_tuple* tuple;
    
  endpoint = post->endpoint;
  barrier = &post->ts.barrier;
    
  /* search all queued messages for one matching */
  for (msg = endpoint->queued_messages.next;
       msg != &endpoint->queued_messages;
       msg = msg->next) {
    
    if ((msg->match_info == barrier->match_info) &&
	(msg->type == type)) {
      MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("Found queued barrier for match_key = 0x%08x%08x\n",
                MX_U32(barrier->match_info), MX_L32(barrier->match_info)));
      break;
    }
  }
    
  /* if not found, return false */
  if (msg ==  &endpoint->queued_messages) {
    return 0;
  }

  /* take the queued message out of the list */
  MX_LIST_REMOVE(msg);
    
  //post->callback_fct (endpoint, post->callback_arg, NULL);
  tuple = post->status.context;
  tuple->callback(endpoint, tuple->post);
  free(tuple);
    
  /* all done with the message struct */
  free(msg);
    
  mx_destroy_post(post);

  return 1;		/* return true */
}

struct mx_pending_barrier *
mx_get_pending_barrier(struct mx_endpoint *endpoint,
		       uint64_t match_info,
		       uint32_t len,
		       mx_post_type_t type)
{
  struct mx_pending_barrier *pend;
    
  /* search all pending receives for one matching */
  for (pend = endpoint->pending_barriers.next;
       pend != &endpoint->pending_barriers;
       pend = pend->next) {
    
    if ((match_info == pend->match_info) &&
	(type == pend->type)) {
      MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("Found matching pending barrier for match_data = 0x%08x%08x\n",
                MX_U32(match_info), MX_L32(match_info)));
      return pend;
    }
  }
    
  pend = malloc(sizeof (*pend));
  if (pend != NULL) {
    pend->match_info = match_info;
    pend->type = MX_POST_TYPE_BARRIER;
    pend->current_count = 0;
    pend->total_count = 0;
  }
  MX_LIST_INSERT(&endpoint->pending_barriers, pend);
  return pend;
}


struct mx_post *
mx_find_barrier(struct mx_endpoint *endpoint,
		uint64_t match_info,
		mx_post_type_t type)
{
  struct mx_post *post;
  struct mx_lib_barrier *barrier;
    
  /* search all pending receives for one matching */
  for (post = endpoint->barrier_list.next;
       post != &endpoint->barrier_list;
       post = post->next) {
        
    barrier = &post->ts.barrier;
    if ((match_info == barrier->match_info) &&
	(type == barrier->type)) {
      MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("Found matching receive for match_data = 0x%08x%08x\n",
                MX_U32(match_info), MX_L32(match_info)));
      return post;
    }
  }
    
  /* not found if we got here */
  return NULL;
}

void
mx_process_barrier(struct mx_endpoint *endpoint,
		   struct mx_address_desc *ep,
		   uint64_t match_info,
		   uint32_t len,
		   mx_post_type_t type)
{
  struct mx_pending_barrier *pend;
  int rc;
  char buf[16];
  uint32_t count;
  struct mx_post *post;
  struct mx_barrier_callback_tuple *tuple;
    
  pend = mx_get_pending_barrier(endpoint, match_info, len, type);
    
  /* slurp the data from the socket */
  rc = mx_sock_read(ep->recv_sock, buf, len);
  if (rc == -1) {
    mx_close_recv_socket(endpoint, ep);
    return;
  }
    
  printf("[%s]\n", buf);
  sscanf(buf, "%u", &count);
  MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("count = %u\n", count));
  if (pend->total_count == 0) {
    pend->total_count = count;
  }
  else if (pend->total_count != count) {
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("barrier count mismatch\n"));
    exit(1);
  }
  pend->current_count++;
  MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("current count = %u\n", pend->current_count));
  MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("total count = %u\n", pend->total_count));
  if (pend->current_count == pend->total_count-1) {
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("everyone has reendpointed in\n"));
    MX_LIST_REMOVE(pend);
    free(pend);
    post = mx_find_barrier(endpoint, match_info, type);
    if (post != NULL) {
      MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("found a barrier\n"));
      MX_LIST_REMOVE(post);
            
      //post->callback_fct (endpoint, post->callback_arg, NULL);      
      tuple = post->status.context;
      tuple->callback(endpoint, tuple->post);
      free(tuple);
            
      free (post);
    }
    else {
      struct mx_queued_msg *msg;
      MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("queuing a barrier\n"));
      msg = calloc(1, sizeof(*msg));
      msg->match_info = match_info;
      msg->type = type;
      MX_LIST_INSERT(&endpoint->queued_messages, msg);
    }
  }
}


void
mx_process_issend_ack(struct mx_endpoint *endpoint,
                      struct mx_address_desc *ep){
  mx_request_t req;
  struct mx_post *post;
  int rc;
    
  rc = mx_sock_read(ep->recv_sock, &req, sizeof(req));
  if (rc == -1) {
    mx_close_recv_socket(endpoint, ep);
    return;
  }

  pthread_mutex_lock(&Mx_po_lock);

  for (post = endpoint->buffered_posts.next;
       post != &endpoint->buffered_posts;
       post = post->next) {
    if (post == &req->post) break;
  }

  if (post == &endpoint->buffered_posts){
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("No buffered issend\n"));
    exit(1);
  }
  MX_LIST_REMOVE(post);
  MX_LIST_INSERT(&endpoint->completed_posts, post);


  /* Fill in status fields */
  post->status.code = MX_STATUS_SUCCESS;
  post->status.msg_length = post->ts.send.length;
  post->status.xfer_length = post->ts.send.length;

  MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("received issend ack\n")); 
  post->complete = 1;	/* mark as completed */
  pthread_cond_signal(&endpoint->peek_cond);
  pthread_cond_signal(&post->wait_cond);
  pthread_mutex_unlock(&Mx_po_lock);
}

void
mx_process_put_ack(struct mx_endpoint *endpoint,
                   struct mx_address_desc *ep){
  mx_request_t req;
  mx_status_code_t err;
  struct mx_post *post;
  int rc;
    
  rc = mx_sock_read(ep->recv_sock, &req, sizeof(req));
  if (rc == -1) {
    mx_close_recv_socket(endpoint, ep);
    return;
  }
  rc = mx_sock_read(ep->recv_sock, &err, sizeof(err));
  if (rc == -1) {
    mx_close_recv_socket(endpoint, ep);
    return;
  }

  pthread_mutex_lock(&Mx_po_lock);

  for (post = endpoint->putget_posts.next;
       post != &endpoint->putget_posts;
       post = post->next) {
    if (post == &req->post) break;
  }

  if (post == &endpoint->putget_posts){
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("No buffered put/get\n"));
    exit(1);
  }
  MX_LIST_REMOVE(post);
  MX_LIST_INSERT(&endpoint->completed_posts, post);

  /* Fill in status fields */
  if (err){
    post->status.code = MX_STATUS_REJECTED;
  } else {
    post->status.code = MX_STATUS_SUCCESS;
  }
  post->status.msg_length = post->ts.send.length;
  post->status.xfer_length = post->ts.send.length;

  MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("received put ack\n")); 
  post->complete = 1;	/* mark as completed */

  pthread_cond_signal(&post->wait_cond);
  pthread_cond_signal(&endpoint->peek_cond);
  pthread_mutex_unlock(&Mx_po_lock);
}


mx_return_t
mx_post_issend_ack(mx_endpoint_t endpoint, 
                   mx_endpoint_addr_t dst,
                   mx_request_t request)
{
  mx_segment_t seg;
  mx_request_t req;
  static mx_request_t sreq;

  sreq = request;
    
  MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("sending issend ack\n"));
  seg.segment_ptr = &sreq;
  seg.segment_length = sizeof(request);

  mx_isend_with_type(endpoint, &seg, 1, dst, 0, NULL, &req,
                     MX_SR_TYPE_ISSEND_ACK, 0, 0, 0); 

  return MX_SUCCESS;
    
}

